from IPython.display import HTML
HTML('''<script>
code_show=true;
function code_toggle() {
if (code_show){
$('div.input').hide();
} else {
$('div.input').show();
}
code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
%matplotlib inline
# encoding: utf-8
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
import numpy as np
import math
from glob import glob
import fileinput
import re
import collections
import pickle
from alphabet_detector import AlphabetDetector
import os
import seaborn as sns
import community
from collections import deque
pd.set_option('display.max_rows', 10)
pd.set_option('precision', 2)
Political alienation is a recurring theme in todays politics, recently we witnessed it playing a role in the American election as well as the british Brexit. Paradoxically we are in an age where polticians can connect to the voters in many new ways, and we saw social media, like Twitter, being widely used in the recent american election.
In this report we will be analysing how politicians use social media, who they talk about, what they talk about and how they talk about them. It is difficult to say what is the cause of polticial alienation, but smear campaigns and negative rhetorics amongst polticians could be a symptom as well as a cause, other factors could be at play but will not be the focus of this project.
We will be using graph theory to both visualize and analyse our data as well as text analysis, namely tf-idf and word sentiment to analyse the contents of tweets. Our goal is to try to shed some light on how Twitter is used on the political stage, such as who thay talk about and how they talk about each other.
By using tweepy we have collected the last 3400 tweets from 518 politicians on Twitter found here. By scraping and cross referencing names from this website and manually finding and entering politicial parties, we have been able to identify the party of 234 of the 514 politicians. This has enabled us to construct directed graphs, using politicians Twitter accounts as nodes and referrences to other politicians as edges in the graph.
To read the complete report see the explainer notebook
# load in word valence dict
sentimentDict = {}
fnames = glob('AFINN-*.txt')
for line in fileinput.FileInput(fnames):
words = line.split('\t')
word = words[0]
sentimentDict[word] = words[1]
df1 = pd.read_csv('politiciansFull.csv')
df2 = pd.read_csv('politiciansPartiesUtf.csv')
fullDf = pd.merge(df1, df2, how='left', on=['name'])
# using the left right spectrum collected from http://www.altinget.dk/artikel/det-nye-politiske-kompas
# we have indexed every politican with a corresponding index on the left right scale
parties = ["Enhedslisten", 'Alternativet', "Socialistisk Folkeparti",
"Socialdemokraterne", 'Radikale Venstre', 'Venstre',
'Det Konservative Folkeparti', 'Liberal Alliance', 'Dansk Folkeparti']
colorCoding = {'Venstre':'b', 'Socialdemokraterne': 'r', 'Radikale Venstre': 'm',
'Enhedslisten':'#9F000F', 'Socialistisk Folkeparti':'#C24641', 'Liberal Alliance':'#82CAFF',
'Dansk Folkeparti':'#FFD801', 'Det Konservative Folkeparti': '#347C17', 'Alternativet': '#00FF00'}
fullDf
The size of our data set is 66.6 MB and is on top of the tweet text also a timestamp that indicates the time the tweet was written and a tweet id.
From the collected data we have built two different networks, one which is a directed graph and one which is a directed multi graph. The two have different advantages, the regular graph allows for more analysis to be performed, while the multi graph is a better model for the information, since it models the refences as edges in one-to-one.
# add politicians as nodes, depending on their party, then give them
# an index on the left-right political spectrum and and a poltical block
def loadInNodes(G):
for index, row in fullDf.iterrows():
if pd.isnull(row['Party']):
G.add_node(row['twitterAccount'],
{'name': row['name'], 'followers': row['followers'],
'following': row['following'], 'tweets': row['tweets'],
'created_at':row['created_at'], 'type': row['type'], 'party': 'None'})
else:
psIndex = -1
block = -1
if row['Party'] == 'Venstre':
block = 1
psIndex = 7
elif row['Party'] == 'Socialdemokraterne':
psIndex = 4
block = 0
elif row['Party'] == 'Radikale Venstre':
psIndex = 1
block = 0
elif row['Party'] == 'Enhedslisten':
psIndex = 2
block = 0
elif row['Party'] == 'Socialistisk Folkeparti':
psIndex = 3
block = 0
elif row['Party'] == 'Liberal Alliance':
psIndex = 6
block = 1
elif row['Party'] == 'Dansk Folkeparti':
psIndex = 8
block = 1
elif row['Party'] == 'Det Konservative Folkeparti':
psIndex = 5
block = 1
elif row['Party'] == 'Alternativet':
psIndex = 0
block = 0
G.add_node(row['twitterAccount'],
{'name': row['name'], 'followers': row['followers'],
'following': row['following'], 'tweets': row['tweets'],
'created_at':row['created_at'], 'type': row['type'],
'party': row['Party'], 'politicalSpectrum': psIndex, 'Block': block})
# load in edges based on each reference made
# from a poltician to another politician.
# if a reference is found the the sentiment profile of
# the tweet is also stored in the edge for later
# if the graph is not a multi graph
# then it adds weight to edges
def loadInEdges(G):
for index, row in fullDf.iterrows():
foundCSV = False
try:
tweets = pd.read_csv('cleaned_politikere_tweets/' +
row['twitterAccount'][1:] + ".csv")
foundCSV = True
except:
pass
if foundCSV:
for tweet in tweets['text']:
for politician in fullDf['twitterAccount']:
if str(politician) in tweet:
sentiment = 0
words = tweet.replace(',','').replace('.','').split(' ')
count = 0
for word in words:
if word in sentimentDict:
sentiment += int(sentimentDict[word])
count += 1
if count != 0:
sentiment = sentiment/float(count)
else:
sentiment = None
if type(G) == nx.DiGraph:
if G.has_edge(row['twitterAccount'],politician):
if sentiment != None:
if G.edge[row['twitterAccount']][politician]['sentiment'] != None:
G.edge[row['twitterAccount']][politician]['sentiment'] += int(sentiment)
else:
G.edge[row['twitterAccount']][politician]['sentiment'] = int(sentiment)
G.edge[row['twitterAccount']][politician]['weight'] += 1
else:
G.add_edge(row['twitterAccount'], politician, sentiment=sentiment, weight=1)
elif type(G) == nx.MultiDiGraph:
G.add_edge(row['twitterAccount'], politician, sentiment=sentiment)
# remove all nodes that are not connected to any other poltician
G.remove_nodes_from(nx.isolates(G))
# draw the given graph, amount of followers dictates size
# of node and party determines color
def drawPoliGraph(G, alpha):
fig = plt.figure(1, figsize=(15,15))
pos=nx.spring_layout(G, k=0.9)
for node in G.nodes():
color = 'w'
if G.node[node]['party'] in colorCoding:
color = colorCoding[G.node[node]['party']]
else:
color = '#999966'
nx.draw_networkx_nodes(G,pos,
nodelist=[node],
node_color=color,
node_size=G.node[node]['followers']/80 + 5,alpha=0.8)
if G.node[node]['followers']>20000:
nx.draw_networkx_labels(G, pos, font_size= 10,font_color='w', font_weight='bold',labels= {node.decode("utf-8"):G.node[node]['name'].decode("utf-8")})
nx.draw_networkx_edges(G, pos, arrows=False, width=0.3, alpha=alpha)
Below are the two networks, regular graph and multi graph.
G = nx.DiGraph()
loadInNodes(G)
loadInEdges(G)
multiG = nx.MultiDiGraph()
loadInNodes(multiG)
loadInEdges(multiG)
drawPoliGraph(G, 0.1)
drawPoliGraph(multiG,0.02)
By having a look at the degree distribution of the multigraph, we can see like most social networks it follows the degree distribution of a scale free network.
GDegrees = G.degree(G.nodes()).values()
Dmax = np.amax(GDegrees)
Dmin = np.amin(GDegrees)
hist, bins = np.histogram(GDegrees, range(Dmin, (Dmax + 1)))
width = 0.7 * (bins[1] - bins[0])
center = (bins[:-1] + bins[1:]) / 2
plt.subplots_adjust(hspace=0.4)
plt.subplots(figsize=(15,5))
plt.subplot(221)
plt.plot(center, hist, 'ro', markersize=4)
plt.title('distribution of degree in the politicians network')
plt.subplot(222)
plt.loglog(center, hist, 'ro', markersize=4)
plt.title('distribution of degree in the politicians network on a log scale')
GDegrees = multiG.degree(multiG.nodes()).values()
Dmax = np.amax(GDegrees)
Dmin = np.amin(GDegrees)
hist, bins = np.histogram(GDegrees, range(Dmin,(Dmax + 1)))
width = 0.7 * (bins[1] - bins[0])
center = (bins[:-1] + bins[1:]) / 2
plt.subplots_adjust(hspace=0.4)
plt.subplots(figsize=(15,5))
plt.subplot(223)
plt.plot(center, hist, 'ro', markersize=4)
plt.title('distribution of degree in a multi graph polticians network')
plt.subplot(224)
plt.loglog(center, hist, 'ro', markersize=4)
plt.title('distribution of degree in a multi graph polticians network on a log scale')
plt.show()
Especially the directed multi graph seems to follow a power law destribution.
In this section we will be covering what the politicians are talking about on twitter. First we will be describing what measures we have taken to clean and prepare the data, as well as which methods we have been using to analyse differences in word patterns between parties.
Some of our preliminary attempts at plotting the word clouds resultet in some strange plots. Like the following word cloud plot of the tf-idf from the top 400 words used by Dansk Folkeparti.
By searching through the tweets of members of Dansk Folkeparti, we found out that one of the members of the parti had connected her twitter account to her player account in an ipad game. The game was then auto tweeting her accomplishments in the game, this was going on for some weeks, resulting in an abundance of words like game, tribez, coins, etc.
A similar case was in the following word cloud plot.
Where a poltician had connected her endomondo runner application to her twitter profile, causing the words running, here: and km to show up.
Our solution to this problem was to create a cleaner function (can be found in explainer notebook) that through a key identifier, like, '#Endomondo' or 'tribez' finds lines in csv files and deletes them.
In the following section we will be showing what we found to be the most defining words for each party amongst the top 400 most used words for all parties. This will be done by using an algorithm called tf-idf, that calculates a words importance based on how often it occurs in other texts.
# load in pickle word count dictionary for each party
# seperate words into hastags, references and regular words.
def loadInSepOcc(BoWsDict, RefsDict, hashtagsDict, path, minOcc = 0):
for party in parties:
refDict = collections.OrderedDict()
hashDict = collections.OrderedDict()
wordDict = collections.OrderedDict()
BoW = pickle.load( open(path + "/" + party + "WordCount.p", "rb" ) )
for k,v in BoW.items():
if v >= minOcc:
if len(k) > 0:
if k[0]=='#':
hashDict[k] = v
elif k[0]=='@':
refDict[k] = v
else:
wordDict[k] = v
BoWsDict[party] = wordDict
RefsDict[party] = refDict
hashtagsDict[party] = hashDict
def calcTfIdf(BoW, BoWList, topAmount = 0):
if topAmount==0:
topAmount = len(BoW)
tfIdfDict = {}
# calculate total amount of words in order to calculate the frequncy
total = float(sum(BoW.values()))
# filter of all but a top amount of words
words = list(BoW)[:topAmount]
Dictionaries = []
# build a dictionary of the needed words for fast lookup
for D in BoWList.values():
if topAmount != len(BoW):
Dictionaries.append(dict((k, D[k]) for k in list(D)[:topAmount]))
else:
Dictionaries.append(BoW)
# calculate the tf-idf for the top n most used words in the bag of words
for word in words:
n = sum(1 for x in Dictionaries if word in x)
tf = BoW[word] / total
idf = math.log((len(BoWList) + 1) / float(1 + n))
tfIdfDict[word] = tf * idf
return tfIdfDict
def convertToLoT(dictionary):
# convert to list of tuples for plotting
LoT = []
for word, value in dictionary.items():
if not word.lower() in stopWords.union(STOPWORDS):
LoT.append((word.decode('utf-8'), value))
return LoT
import random
from scipy.misc import imread
def color_func(word, font_size, position, orientation, random_state=None, **kwargs):
return "hsl(%d, 100%%, 50%%)" % random.randint(0, 270)
def plotWordCloud(TuplelistWord, TuplelistRef, TuplelistTag, party):
# plot a wordcloud of the BoW
f, axarr = plt.subplots(1, 3, figsize=(15,30))
cloudMask = imread("Pictures/cloudMask.png")
birdMask = imread("Pictures/twitter_mask.png")
cloudMask2 = imread("Pictures/mask-cloud.png")
splatMask = imread("Pictures/spot-splatter-stencil.jpg")
wcWords = WordCloud(background_color="white", max_words=40, mask=cloudMask2)
wcWords.generate_from_frequencies(TuplelistWord)
axarr[0].imshow(wcWords.recolor(color_func=color_func, random_state=3))
axarr[0].axis("off")
wcRefs = WordCloud(background_color="white", max_words=40, mask=cloudMask2)
wcRefs.generate_from_frequencies(TuplelistRef)
axarr[1].imshow(wcRefs.recolor(color_func=color_func, random_state=3))
axarr[1].axis("off")
axarr[1].set_title(party, fontsize=20, fontweight='bold')
wcTags = WordCloud(background_color="white", max_words=40, mask=cloudMask2)
wcTags.generate_from_frequencies(TuplelistTag)
axarr[2].imshow(wcTags.recolor(color_func=color_func, random_state=3))
axarr[2].axis("off")
BoWsDict = {}
BoRDict = {}
BoHDict = {}
# import the bag of words from each party
loadInSepOcc(BoWsDict, BoRDict, BoHDict, "politiciansWordCount/")
# stop words in both dansih and english
stopWords = ("mr,na,er,om,rt,i,af,alle,andet,andre,at,begge,da,de," +
"den,denne,der,deres,det,dette,dig,din,dog,du,ej,eller, " +
"en,end,ene,eneste,enhver,et,fem,fire,flere,fleste,for,fordi,"+
"forrige,fra,få,før,god,han,hans,har,hendes,her,hun,hvad,hvem,"+
"hver,hvilken,hvis,hvor,hvordan,hvorfor,hvornår,i,ikke,ind,ingen"+
",intet,jeg,jeres,kan,kom,kommer,lav,lidt,lille,man,mand,mange,med"+
",meget,men,mens,mere,mig,ned,ni,nogen,noget,ny,nyt,nær,næste,"+
"næsten,og,op,otte,over,på,se,seks,ses,som,stor,store,syv,ti,til,"+
"to,tre,ud,var").split(',')
stopWords = set(stopWords)
from wordcloud import WordCloud, STOPWORDS
# plot the tf-idf of the top 400 words from each party
for party in parties:
plotWordCloud(convertToLoT(calcTfIdf(BoWsDict[party], BoWsDict, 400)),
convertToLoT(calcTfIdf(BoRDict[party], BoRDict, 400)),
convertToLoT(calcTfIdf(BoHDict[party], BoHDict, 400)),
party)
As we can see, many of the words that are scored the highest by the tf-idf algorithm, are either words that are descriptive of the politicial focus areas for the given party. Such as border control (in danish grænsekontrol) for the right wing, nationalist conservative parti Dansk Folkeparti or green (in dansih grøn) for the left wing Alternativet, probably refering to green energy.
But what is also interesting is that parties tend to refer much to themselves, their party members, and events tied to their party, more than they refer to other parties, party members or events. We will be covering this in further detail in the following section.
In this section we will using graph tools and theory to analyze who politicians generally talk about.
The following are the top 5 politicians that are the most central in the network are, with regards to how much they are referred to, how much they refer to others and how much they link different communities together.
inCentral = nx.in_degree_centrality(G)
# find the politician with highest in degree centrality
print "Top 5 Most refered to: " , sorted(inCentral, key=lambda i: inCentral[i], reverse=True)[:5]
outCentral = nx.out_degree_centrality(G)
# find the politician with highest out degree centrality
print "Top 5 Most referring: " ,sorted(outCentral, key=lambda i: outCentral[i], reverse=True)[:5]
between = nx.betweenness_centrality(multiG)
# find the politician with highest betweeness centrality
print "Top 5 Most linking: " ,sorted(between, key=lambda i: between[i], reverse=True)[:5]
By calculating the assortativity in relation to degree on the multi graph, we can see that polticians that are often refered to, tend to refer to each other, while politicians that are not often referred to tend to refer to each other. This is not surprising news, as social networks tend to have these attributes.
assortativity = nx.degree_pearson_correlation_coefficient(multiG)
print assortativity
What is more interesting is to calculate the assortativity according to their index on the political left to right spectrum. We have indexed all party members according to their party's place in the order of left to right, found on this website. The scale has been developed by Kenneth Thue Nielsen and are based on numbers from the research agency Norstat
assortativity = nx.attribute_assortativity_coefficient(multiG, 'politicalSpectrum')
print assortativity
Indicating that politicians tend to reference other politicians that are within the same political spectrum as themselves.
If we calculate the assortativity according to which political block they are part of, we can see that the assortivity is also quite high but lower than that of the polticial spectrum.
By block we refer to whether a party is said to belong to the red or blue block, depending on if they tend to vote for the left wing or the right wing.
assortativity = nx.attribute_assortativity_coefficient(multiG, 'Block')
print assortativity
By calculating the modularity, we can see whether the parties are good distributions of communities according to who they reference. This however, cannot be performed on multi graphs or directed graphs. To accomodate that the simple directed graph aggregates references, we have fitted the edges with weights to indicate that a reference has occured several times.
# convert to undirected
Gun = G.to_undirected()
# get the parties of the polticians
politiciansPartyDict = nx.get_node_attributes(Gun, "party")
# calculate the best partition of the parties
partitions = community.best_partition(Gun, weight='weight')
print community.modularity(politiciansPartyDict, Gun, weight='weight')
print community.modularity(partitions, Gun, weight='weight')
print len(set(partitions.values()))
Showing that the distribution of communities based on parties, does not defer that much from the optimal destribution, which would be to seperate the parties into 20 different parties. If we try to calculate the optimal destribution while keeping the original amount of parties we get.
partitions = community.best_partition(Gun, weight='weight', partition=politiciansPartyDict)
print community.modularity(partitions, Gun, weight='weight')
We can than make a confusion matrix filled with percentages to visualize what percentages of which parties should be redistributed into new parties to construct the optimal destribution.
prctMatrix = []
for c in set(partitions.values()):
counts = []
for party in parties:
count = 0
nodesInParty = [node for node,attr in politiciansPartyDict.items() if attr == party ]
nodesInPartition = [node for node,attr in partitions.items() if attr == c ]
for node in nodesInPartition:
if node in nodesInParty:
count += 1
counts.append(float(count)/len(nodesInParty) * 100)
prctMatrix.append(counts)
columnNames = parties
Dprct = pd.DataFrame(prctMatrix, columns= columnNames)
Dprct
This table is of course solely based on who politicians of each party tend to reference, and therefore sais nothing about what parties should be formed based on political views. It does however tell us that politicians tend to refer to politicians whithin their own party more than they refer to politicians that are part of other parties. This seems quite positive, and in the following section we will be covering how they tend to tweet in general and about each other.
In this section we will explore the sentiment valence of words written by politicians on Twitter. We will look into what it looks like over time as well as what it looks like when the politicians reference each other in a tweet.
We have used MRJob written by Yelp, which is a MapReduce library for Python to calculate the sentiment of a tweet and aggregate it on party by party basis. The code for our MapReduce job can be found in Appendix E.
Here we import the files and you can see that there are many NaN values which is because of how we join the files together into a DataFrame. There are fewer and fewer NaN values over time when the politicians of the parties become more active
sentiDf = pd.DataFrame()
for party in parties:
senti = pd.read_csv('partier/sentiment_'+party+'.tsv',
sep='\t',
parse_dates=[0],
header=None,
names = ["Date", party],
index_col=0
)
sentiDf = sentiDf.join(senti, how='outer')
sentiDf
In the below plot you can see the number of NaN values over time just to get a feeling with how active the different parties are on Twitter. The y-axis corresponds to how many NaN values there are per row.
sentiDf.isnull().sum(axis=1).rolling(window=10).mean().plot(figsize=(15,4))
It looks like halfway through 2014 might be a good date to perform the rest of the analysis on as the activity is picking up there
f, axarr = plt.subplots(9, sharex=True, sharey=True, figsize=(15,12))
count = 0
plt.tight_layout(h_pad=1.2)
for party in parties:
axarr[count].set_title(party, fontsize = 15)
axarr[count].plot(sentiDf['2014-06-01' < sentiDf.index][party].dropna(), c = colorCoding[party])
count += 1
plt.show()
We can clearly see that most party's politicians have become more and more active on Twitter during the last years. Some parties did not have active people on Twitter before 2012. Some had breaks for a long time and the data is generally really sparse before 2013-2014. Which is why we will focus on tweets from 2014 and onwards.
It is hard to see what is going on in the plots because of all the noise. Let's take a look at it again where we average over a period. We tried averaging over many intervals but we will only show it for the last 7 days to see if we can see a difference.
f, axarr = plt.subplots(9, sharex=True, sharey=True, figsize=(15,12))
count = 0
plt.tight_layout(h_pad=1.2)
for party in parties:
axarr[count].set_title(party, fontsize = 15)
axarr[count].plot(sentiDf['2014-01-01' < sentiDf.index][party].dropna().rolling(window=7, min_periods=0).mean(), c = colorCoding[party])
count += 1
plt.show()
It is now much easier to see what is happening but we only see it on a 7 day period though. Some patterns we can notice are: all the parties are clearly more positive than they are negative, we can see they hover around 0,6 in average (we calculated it to be 0,633); and there are some tendencies that can be spotted for example a period in July where all the parties seem to have a more positive sentiment value. Some parties look like they might have a high correlation so let's look into that
Calculating the correlation between the sentiment of the parties during a moving window average of 7 days didn't give us much insight as all the values were close to 0. The two highest values were:
If we average over the last 30 days we see more interesting correlations. Venstre and Dansk Folkeparti is now at 0,48, and Det Konservative Folkeparti and Liberal Alliance are those that have the highest negative value of -0,325120.
sentiDf[sentiDf.index > '2014-01-01'].rolling(window=30, min_periods=0).mean().corr()
Looking at specific dates which might be important to politicians
print "Queen", sentiDf['2015-04-16' == sentiDf.index].mean().mean() # queen
print "Shooting", sentiDf['2015-02-14' == sentiDf.index].mean().mean() # krudttønden
print "Space", sentiDf['2015-09-02' == sentiDf.index].mean().mean() # dane in space
print "Gold OL", sentiDf['2016-08-15' == sentiDf.index].mean().mean() # Swimmer wins gold
One last date which should be polarizing for the parties according to their politics is:
Where we some parties are very happy and others are not
print sentiDf['2015-12-03' == sentiDf.index].mean()
In the following section we will be covering what sentiment profile polticians have when they refer to politicians from various parties. By embedding each edge with the sentiment of the tweet that referred to another politician, we can calculate what the average sentiment is between all the parties.
We calculated the average sentiment for each party towards all other parties, including themselves, and plotted them in the below figure.
def calcPartyReferenceSentiment(multiG):
partiesReferenceSentiment = {}
partiesReferenceSentimentCount = {}
for edge in multiG.edges(data=True):
currentParty = multiG.node[edge[0]]['party']
otherParty = multiG.node[edge[1]]['party']
if edge[2]['sentiment'] != None and currentParty in parties and otherParty in parties:
if currentParty in partiesReferenceSentiment:
if otherParty in partiesReferenceSentiment[currentParty]:
partiesReferenceSentiment[currentParty][otherParty] += edge[2]['sentiment']
partiesReferenceSentimentCount[currentParty][otherParty] += 1
else:
partiesReferenceSentiment[currentParty][otherParty] = edge[2]['sentiment']
partiesReferenceSentimentCount[currentParty][otherParty] = 1
else:
partiesReferenceSentiment[currentParty] = {otherParty:edge[2]['sentiment']}
partiesReferenceSentimentCount[currentParty] = {otherParty: 1 }
for currentParty in parties:
for otherParty in parties:
partiesReferenceSentiment[currentParty][otherParty] = partiesReferenceSentiment[currentParty][otherParty]/partiesReferenceSentimentCount[currentParty][otherParty]
return partiesReferenceSentiment
partiesReferenceSentiment = calcPartyReferenceSentiment(multiG)
def plotPoliticalPartySentiment(partiesReferenceSentiment):
f, axarr = plt.subplots(3, 3, sharex=True, figsize=(10,18))
countx = 0
county = 0
colors = []
plt.tight_layout(pad=0.4, w_pad=10.5, h_pad=5)
sns.set_style({'xtick.major.size': 0.0})
for party in parties:
axarr[countx][county].set_title(party, fontsize=20)
partyNames = sorted(partiesReferenceSentiment[party], key=lambda i: partiesReferenceSentiment[party][i], reverse=True)
colors = [colorCoding[partyName] for partyName in partyNames]
sentimentValues = sorted(partiesReferenceSentiment[party].values(), reverse=True)
sns.barplot(x=sentimentValues, y=[y.decode('utf-8') for y in partyNames], ax=axarr[countx][county], palette=colors)
countx += 1
if countx % 3 == 0:
county += 1
countx = 0
plotPoliticalPartySentiment(partiesReferenceSentiment)
Based upon these plot there seems to be a small tendency for parties to write more positively about parties they share politicial values with.
As we stated ealier in the report it is quite difficult to point what the reasons are of polticial alienation, and the reasons are problably numerous. Our research seems to indicate that it is not by promoting negative rhetoric about political adversaries on twitter that they are inciting a disbelief in politicians.
However, our simple sentiment analysis is good when trying to determine wether a text is generally positive or negative, but not good a deciphering complex sentence structure and intricate natural language details. Such as whether a positivite/negativite word is referring to a specific person, or whether a positive word is used sarcasticly. It is therefore not possible from our analysis whether politicians writing about each other in positive terms on twitter. But we can determine that politicians generally talk more about their own party members and party members of fellow minded parties, more than they talk about politicians from parties of the other side of the political spectrum.